*****************
**Construct merged dataset
*****************

cd "/Users/cbarrie6/Dropbox/pd_projects/irq_mos_dataverse"
use "data/raw/Arab Transformations Data Set STATA Version PA20170503.dta", clear

*remove all countries except Iraq
drop if COUNTRY!=3

tab DATEINT
count if DATEINT<=20140610
* 877
count if DATEINT>20140610
* 736

//Sample essentially split between before and after Fall of Mosul
//Pre- and Post- on attitudes to democracy
//Pre- and Post- on attitudes toward Islam

***********
**Generate treatment period dummy
***********
gen treat = 0
replace treat=1 if DATEINT>20140610
label define treatlabel 0 "No Treatment" 1 "Treatment"
label values treat treatlabel

*treatment with June 10 dropped a la Munoz et al. (2018)
gen treat1 = .
replace treat1=0 if DATEINT<20140610
replace treat1=1 if DATEINT>20140610

*gen June 6 treatment
gen treat2 =0
replace treat2=1 if DATEINT>20140606

***********
**Generate sectarian dummies
***********

*Exploring sectarian identification
codebook V83
* 704 Sunni, 859 shiite, 50 unidentified
codebook V83 if treat==1
* Evenly distributed across the two periods
recode V83(4=0 "Sunni")(5=1 "Shi'i")(99=.), into(shia)
codebook shia

*code "Sect" variable
recode V83(4=1 "Sunni")(5=0 "Shi'i"), into(sect)
replace sect=. if sect==99
codebook sect

*Kurdish
gen kurd=0
replace kurd =1 if REGION==40001|REGION==40002
tab kurd treat

*Age
gen age=2014-V76
*hist age
gen age2=age^2

*Gender
recode V77(1=0 "Male")(2=1 "Female"), into(gender)

*Education
codebook V79
clonevar edu_lvl=V79
recode V79(6 7 =1 "College education")(1 2 3 4 5=0 "No college education"), into(coledu)

*Income
codebook V84
recode V84(1=4 "Our household income covers our expenses well and we are able to save") ///
(2=3 "Our household income covers our expenses without notable difficulties") ///
(3=2 "Our household income does not cover our expenses and we face some difficulties in meeting our needs") ///
(4=1 "Our household income does not cover our expenses and we face significant difficulties in meeting our needs") ///
(98 99=.), into(wealthcat)

*Religion
codebook V37
recode V37(3=1 "Not religious")(2=2 "Religious to some extent")(1=3 "Religious")(98 99=.),into(relcat)
recode V37(3=0 "Not religious")(2 1=1 "Religious/religious to some extent")(98 99=.),into(relbin)

save data/output/iraqarabtrans.dta, replace

*****************
**Building dataset: Merge with shape-file and SIGACTS
*****************

use "data/raw/esoc-iraq-v3_ethnicity.dta", clear

save data/output/iraqeth.dta, replace
clear
spshape2dta data/shapefiles/Iraq_district_boundaries_UTM, replace

use data/shapefiles/Iraq_district_boundaries_UTM, clear
spset
describe
list in 1/5

* Coordinates are planar so do not need to be reset
rename ADM3NAME district

save data/output/iraqshp.dta, replace
use data/output/iraqeth.dta, clear

merge 1:1 district using data/output/iraqshp

keep if _merge==3
drop _merge

save data/output/iraqethspat.dta, replace

gen pctsunni=((sunni_pop_CIA_2003/total_pop_CIA_2003)*100)
gen pctshia=((shia_pop_CIA_2003/total_pop_CIA_2003)*100)

save data/output/iraqethspat.dta, replace

* Merge with list of Arab Trans. survey districts
import excel "data/raw/iraqarabtransdistricts.xls", sheet("Sheet1") firstrow clear
save data/output/atdistricts.dta, replace

use data/output/iraqethspat.dta, clear
merge 1:1 district using data/output/atdistricts.dta
keep if _merge==3
drop _merge
replace distarabtrans=0 if distarabtrans==.

encode district, gen(disid)
save data/output/iraqethspat.dta, replace

//Get before, control, and treatment groups column - before and after (and both) districts
import excel "data/raw/survmapvars.xls", sheet("Sheet1") firstrow clear
save data/output/atdistricts1.dta, replace

use data/output/iraqethspat.dta, clear
merge 1:1 district using data/output/atdistricts1.dta
keep if _merge==3
drop _merge

save data/output/iraqethspat.dta, replace

* Import significant incident data from ESOC
use "data/raw/esoc-iraq-v3_sigact_district-year.dta", clear
collapse (sum) SIGACT-ied_total, by(district)
save data/output/sigact.dta, replace
export excel using "data/output/sigact", firstrow(variables) replace
clear
import excel "data/output/sigact.xls", sheet("Sheet1") firstrow clear
save data/output/sigact.dta, replace
clear
use data/output/iraqethspat.dta
merge 1:1 district using data/output/sigact
keep if _merge==3
drop _merge
save data/output/iraqethspat.dta, replace

* Import district-level context data from ESOC
use "data/raw/esoc-iraq-v3_ilcs-district.dta", clear
rename District district
save data/output/distcon.dta, replace
clear
use data/output/iraqethspat.dta
merge 1:1 district using data/output/distcon //Four districts without contextual info. (Hatra; al-Rutba; al-Ba'aj; Dibis)
drop _merge

save data/output/iraqethspat.dta, replace

* Import district-level economic data from ESOC
use "data/raw/esoc-iraq-v3_econfactors.dta", clear
collapse (mean) hhinc_q1-erate, by(district)
save data/output/distecon.dta, replace
clear
use data/output/iraqethspat.dta
merge 1:1 district using data/output/distecon //
drop _merge
save data/output/iraqethspat.dta, replace

* Import Iraq Body Count data
use "data/raw/esoc-iraq-v3_ibc.dta", clear
collapse (sum) insurgent-sectarian, by(district)
drop if missing(district)
save data/output/secvio.dta, replace
export excel using "data/output/secvio", firstrow(variables) replace
clear

import excel "data/output/secvio.xls", sheet("Sheet1") firstrow clear
save data/output/secvio.dta, replace
clear
use data/output/iraqethspat.dta
merge 1:1 district using data/output/secvio
drop _merge
replace insurgent =0 if insurgent==.
replace sectarian =0 if sectarian==.
rename district distesoc
save data/output/iraqethspat.dta, replace

//Merge with Arab Transformations survey data
use "data/output/iraqarabtrans.dta"
decode DISTRICT, gen(district)
merge m:1 district using data/raw/arabtransdistricts
drop _merge

merge m:1 distesoc using data/output/iraqethspat
keep if _merge==1|_merge==3
drop _merge

merge m:1 distesoc using data/raw/distesoccoords
// 34 not merged because these are in district "23"
drop _merge

//Create distance to Mosul term using Mosul centroid coordinates
gen latmos=36.07588577
gen lonmos=42.9859314
geodist lat lon latmos lonmos, gen(mosdist)
gen sqmosdist=sqrt(mosdist)
gen invmosdist=mosdist^-1
gen sqinvmosdist=sqrt(invmosdist)
gen lnmosdist=ln(mosdist)

// Create identification and civil war worry variables

*V28: "If you were being asked to identify yourself, with which of the following would you most closely identify?"
codebook V28

recode V28(1=1 "Nation")(2 3 4 5=0 "Other")(6 98=.), into(natident)
recode V28(2=1 "Arab")(1 3 4 5=0 "Other")(6 98=.), into(arabident)
recode V28(3=1 "Islam")(1 2 4 5=0 "Other")(6 98=.), into(islident)
recode V28(4=1 "Region")(1 2 3 5=0 "Other")(6 98=.), into(regident)
recode V28(5=1 "Community")(1 2 3 4 =0 "Other")(6 98=.), into(commident)
recode V28(1=1 "Nation")(2=2 "Arab")(3=3 "Islam")(4=4 "Region")(5=5 "Community")(6 98=.), into(ident)

*V16E: To what degree are you worried about: "A civil war in my country"
recode V16E(98 99=.), into(civwarint)
recode V16E(1 2=0 "Not at all/Not much")(3 4=1 "Much/Very much")(98 99=.), into(civwarworry)

//Create respondents and time marker variables
decode DATEINT, gen(date1)
gen datevar=date(date1,"YMD")
*br DATEINT date1 datevar
gen double longdate = date(date1,"YMD")
format longdate %td

gen resp=1
bysort datevar: egen rspdts=sum(resp)

gen resp1=1
bysort datevar: egen rspdts1=sum(resp1) if kurd==0

gen time=1 if datevar<19884
replace time=2 if datevar==19884
replace time=3 if datevar>19884

*****************
** Coding further variables to introduce
*****************
// Generating attitudes to Islam in public life factor
*V27B: A system in which only Islamic parties compete in parliamentary elections.
recode V27B(98 99=.), into(isl1)

*V27C: A system governed by Islamic law in which there are no political parties or elections
recode V27D(98 99=.), into(isl2)

//*V29J Trust in religious leaders
recode V29J(98 99=.), into(isl3)

*V38A: Democracy is a Western form of government that is not compatible with Islam
recode V38A(98 99=.), into(isl4)

*V38B: Islam requires that in a Muslim country the political rights of non-Muslims should be less to those of Muslims
recode V38B(98 99=.), into(isl5)

*V38C: Banks in Muslim countries must be forbidden from charging even modest interest on loans because this is forbidden by Islam
recode V38C(98 99=.), into(isl6)
//*V38D: It is acceptable in Islam for male and female university students to attend classes together
recode V38D(98 99=.), into(isledu)
// Reverse index
gen isl7=5-isledu // negative factor loading on this so will exclude--think respondents potentially confused
**V38E: If a Muslim converts to another religion, s/he must be punished by execution
recode V38E(98 99=.), into(isl8)

*V38F: In Islam, a woman should dress modestly, but Islam does not require that she wear a hijab.
// Not included as unclear implications

*V38G: I prefer a religious party over a non-religious based political party
recode V38G(98 99=.), into(isl9)

*V39B: The government and parliament should implement only the laws derived from the sharia
recode V39B(98 99=.), into(isl10)

*V39D: The government and parliament should make criminal law according to sharia
recode V39D(98 99=.), into(isl11)

*V39E: The government and parliament should make personal status/family law according to sharia
recode V39E(98 99=.), into(isl12)

*V40A: Religious clerics should not influence how people vote in elections
recode V40A(98 99=.), into(relelec)
// Reverse index
gen isl13=5-relelec

*V40B: Religious clerics should have influence over the decisions of government
recode V40B(98 99=.), into(isl14)

*V40C: Religious practice is a private matter and should be separated from socio-economic life
recode V40C(98 99=.), into(isl15)


// Contextual variables transformed
gen sqsect=sqrt(sectarian)
gen sqsigact=sqrt(SIGACT)

gen milperthou=militia*1000
gen clanperthou = clan_ties*100

//worth considerring V59A_1 onwards
recode V55(98 99=.), into(newsint)


//Neighbourhood security rating
recode V12B (98 99=.), into(secint)

recode V16C(98 99=.),into(warint)

recode V16F(98 99=.), into(ethvioint)

recode V14B(98 99=.), into(secnatint)

recode V13B(98 99=.), into(secregint)

** Relabel vars.

lab var natident "National identification"
lab var arabident "Arab identification"
lab var islident "Islamic identification"
lab var treat "Treatment"
lab var shia "Shi'i"
lab var pctsunni "% Sunni"
lab var pctshia "% Shi'i"
lab var age "Age"
lab var age2 "Age2"
lab var gender "Gender"
lab var coledu "College education"
lab var pctshia "% Shi'i"
lab var pctsunni "% Sunni"
lab var sqmosdist "Dist. from Mosul"
lab var sqsect "Prehist. violence"
lab var milperthou "Militia presence"
lab var islfact1 "Islam factor"
lab var clan_ties "Tribal ties"
lab var clanperthou "Tribal ties"

save data/output/iraqmerged.dta, replace

*code additional district-level covariates

gen disarea = AREA_KM2
recode F_TYPE(1=1 "Urban")(2=0 "Rural"), into(urban)
gen popdens = landscan_pop/disarea 
lab var disarea "Dist. area"
lab var urban "Urban/rural"
lab var popdens "Pop. dens."

*code additional individual-level covariates
recode V25(1=1 "Voted")(2=0 "Didn't vote'")(*=.), into(voted)
recode V73(1=0 "Never")(2=1 "Once")(3=2 "Twice")(4 =3 "Three+")(98 99=.),into(hajj)
recode V73(1=0 "No Hajj")(2 3 4=1 "Hajj")(98 99=.),into(hajjbin)
recode V85(14=1)(98 99=.)(*=0), into(student)
recode V85(12=1)(98 99=.)(*=0), into(unemp)
recode V85(11=1)(98 99=.)(*=0), into(housewife)
recode V85(13=1)(98 99=.)(*=0), into(retired)
recode V85(2 3 4 5 6 7 8 9 10 =1)(98 99=.)(*=0), into(inwork)
recode V85(14=1 "Student")(12=2 "Unemployed")(11=3 "Housewife")(13=4 "Retired")(2 3 4 5 6 7 10 = 5 "In work")(98 99=.), into (empstat)
recode V70(98 99=.), into (healthstat)
recode healthstat(1=1)(*=0), into(healthvp)
lab var healthvp "Health v. poor"
recode healthstat(2=1)(*=0), into(healthp)
lab var healthp "Health poor"
recode healthstat(3=1)(*=0), into(healthf)
lab var healthf "Health fair"
recode healthstat(4=1)(*=0), into(healthg)
lab var healthg "Health good"
recode healthstat(5=1)(*=0), into(healthvg)
lab var healthvg "Health v. good"

recode wealthcat(1=1 "Wealth low")(2=2 "Wealth low/mid")(3=3 "Wealth mid/high")(4=4 "Wealth high"),into(wealthcatr)
lab var wealthcatr "Wealth (categorical)"
recode wealthcatr(1=1)(*=0), into(wealthl)
lab var wealthl "Wealth low"
recode wealthcatr(2=1)(*=0), into(wealthlm)
lab var wealthlm "Wealth low/mid"
recode wealthcatr(3=1)(*=0), into(wealthmh)
lab var wealthmh "Wealth mid/high"
recode wealthcatr(4=1)(*=0), into(wealthh)
lab var wealthh "Wealth high"

recode relcat(1=1)(*=0),into(relnr)
lab var relnr "Not reliigious"
recode relcat(2=1)(*=0),into(rels)
lab var rels "Somewhat religious"
recode relcat(3=1)(*=0),into(rel)
lab var rel "Reliigious"

lab var voted "Voted last elec."
lab var hajj "Hajj"
lab var student "Student"
lab var unemp "Unemployed"
lab var housewife "Housewife"
lab var retired "Retired"
lab var inwork "In work"
lab var empstat "Employment status"
lab var relcat "Religiosity"
lab var healthstat "Health status"
lab var need_sum "Need categories"
lab var need_sum1 "Need categories 1"
lab var need_sum2 "Need categories 2"

recode V2(98 99=.), into(gentrust)
lab var gentrust "Trust in people"

gen treatshi = treat*shia
lab var treatshi "Treatment*Shi'i"
rename REGION regid

*religion questions
*V38A: Democracy is a Western form of government that is not compatible with Islam
recode V38A(98 99=.), into(rel1)
lab var rel1 "Democ. unIslamic"

*V38B: Islam requires that in a Muslim country the political rights of non-Muslims should be less to those of Muslims
recode V38B(98 99=.), into(rel2)
lab var rel2 "Islam rights"

*V38C: Banks in Muslim countries must be forbidden from charging even modest interest on loans because this is forbidden by Islam
recode V38C(98 99=.), into(rel3)
lab var rel3 "Banks Islam"

*V38D: It is acceptable in Islam for male and female university students to attend classes together
//reverse index
gen rel4=5-isledu
lab var rel4 "Islam women classes"
**V38E: If a Muslim converts to another religion, s/he must be punished by execution
recode V38E(98 99=.), into(rel5)
lab var rel5 "Islam convert"

*V38F: In Islam, a woman should dress modestly, but Islam does not require that she wear a hijab.
//not included as unclear implications

*V38G: I prefer a religious party over a non-religious based political party
recode V38G(98 99=.), into(rel6)
lab var rel6 "Rel. parties"

*V39B: The government and parliament should implement only the laws derived from the sharia
recode V39B(98 99=.), into(rel7)
lab var rel7 "Sharia laws"

*V39D: The government and parliament should make criminal law according to sharia
recode V39D(98 99=.), into(rel8)
lab var rel8 "Sharia crim. laws"

*V39E: The government and parliament should make personal status/family law according to sharia
recode V39E(98 99=.), into(rel9)
lab var rel9 "Sharia fam. laws"

*V40A: Religious clerics should not influence how people vote in elections
// Reverse index
gen rel10=5-relelec
lab var rel10 "Clerics elections"

*V40B: Religious clerics should have influence over the decisions of government
recode V40B(98 99=.), into(rel11)
lab var rel11 "Clerics govt."

*V40C: Religious practice is a private matter and should be separated from socio-economic life
// Reverse index
gen rel12=5-isl15
lab var rel12 "Religion private"

egen relmean = rmean(rel1 rel2 rel3 rel4 rel5 rel6 rel7 rel8 rel9 rel10 rel11 rel12)
lab var relmean "Mean relig. attitudes"


// Add information on survey completion times
preserve 
import spss using "data/raw/ArabTrans_data entry_june 24 Filter.sav", clear
decode DATEINT, gen(date1)
gen datevar=date(date1,"YMD")
*br DATEINT date1 datevar
gen double longdate = date(date1,"YMD")
format longdate %td
encode date1, gen(decdate)
//Pre-existing time trends
gen yearst=substr(date1,1,4)
gen monthst=substr(date1,5,2)
gen dayst=substr(date1,7,2)
destring yearst, gen(year)
destring monthst, gen(month)
destring dayst, gen(day)
gen edate = mdy(month, day, year)
gen timepoint = edate - 19870

generate timest = string(TIMESTART , "%-tcHH:MM:SS")
generate timeste = string(TIMEEND , "%-tcHH:MM:SS")
generate start = clock(timest,"hms#")
generate end = clock(timeste,"hms#")
format start %-tcHH:MM:SS
format end %-tcHH:MM:SS
gen double intl = end - start
gen double intlsecs = intl/1000
gen double intlmins = intlsecs/60


tostring(INTERVIEW), gen(intst)
tostring(V76),gen(agest)

gen ID_INT = intst + agest


rename V76 unproc_byear
keep ID_INT intlsecs intlmins unproc_byear
duplicates list ID_INT //generates unique ID to match on between unprocessed and processed survey data


save data/output/compl_times.dta, replace

restore

//merge completion times
tostring(INTERVIEW), gen(intst)
tostring(V76),gen(agest)
gen ID_INT = intst + agest
merge 1:1 ID_INT using data/output/compl_times.dta 
*unmatched are because the survey completion times are from uncleaned data

keep if _merge==3 
drop _merge

//generate missing response var.

preserve
local varlist = "V1 V2 V3_1 V3_2 V3_3 V3_4 V3_5 V3_6 V3_7 V3_8 V3_8Army V3_8Terror V3_8Ed V3_8Heal V3_8SyrRef V3_8Refug V3_8ArabS V3_8Status V3_8Other V3_98 V3_99 V4 V5 V6A V6B V6C V7A V7B V7C V8 V9 V10 V11A V11B V11C V12A V12B V12C V13A V13B V13C V14A V14B V14C V15A V15B V15C V16A V16B V16C V16D V16E V16F V16G V16H V16I V16J V17 V18A V18B V19 V20 V21 V22 V23A V23B V23C V24A V24B V24C V24D V24E V25 V26A V26B V26C V26D V26E V26F V26G V26H V27A V27B V27C V27D V28 V29A V29B V29C V29D V29E V29F V29G V29H V29I V29J V30A V30B V31 V32A V32B V32C V32D V32E V32F V32G V32H V33 V34 V35 V36 V37 V38A V38B V38C V38D V38E V38F V38G V39A V39B V39C V39D V39E V40A V40B V40C V41 V42 V42_1 V43 V44 V45A V45B_1 V45B_2 V45C_1 V45C_2 V46_1 V46_2 V46_3 V46_4 V46_5 V46_6 V46_7 V46_DK V46_RA V47_1 V47_2 V47_3 V47_4 V47_5 V47_6 V47_7 V47_DK V47_RA V48A V48B V48C V49A V49B V49C V50A V50B V50C V50D V50E V50F V50G V50H V50I V51 V52_1 V52_2 V52_3 V52_4 V52_5 V52_6 V52_7 V52_8 V52_9 V52_DK V52_RA V53_1 V53_2 V53_3 V53_4 V53_5 V53_6 V53_7 V53_8 V53_9 V53_10 V53_11Pal V53_11KSA V53_11Qat V53_11Kuw V53_11Leb V53_11UAE V53_11Oman V53_11GS V53_00 V53_DK V53_RA V54 V55 V56A V56B V56C V56D V56E V57A V57B V57C V57D V57E V58A V58B V58C V58D V58E V58F V58G V58H V58I V58J V58K V58L V59A_1 V59A_2 V59A_3 V59A_4 V59A_5 V59A_6 V59A_7 V59A_8 V59A_9 V59A_10 V59A_11 V59A_12 V59A_13 V59A_14 V59A_15 V59A_16 V59A_17 V59A_MISS V59A_DK V59A_RA V59B_1 V59B_2 V59B_3 V59B_4 V59B_5 V59B_6 V59B_7 V59B_8 V59B_9 V59B_10 V59B_11 V59B_12 V59B_13 V59B_14 V59B_15 V59B_16 V59B_17 V59B_MISS V59B_DK V59B_RA V60 V61 V62A V62B V62C V62D V62E V63 V64_1 V64_2 V64_3 V64_4 V64_5 V64_6 V64_7 V64_8 V64_RA V65A V65B V65C V65D V65E V66A V66B V66C V66D V66E V66F V66G V66H V67A V67B V67C V67D V67E V67F V68_1 V68_2 V68_3 V68_4 V68_5 V68_6 V68_7 V68_8 V68_9 V68_DK V68_RA V69A V69B V69C V69D V69E V69F V69G V69H V69I V70 V71A V71B V71C V71D V71E V71F V71G V71H V71I V71J V71K V72 V73 V74 V74_1 V74_2 V75 V76 V77 V78 V79 V80 V81 V81_1 V82 V83 V84 V85"

foreach var of local varlist {
	clonevar `var'_m = `var'
	replace `var'_m = 0 if `var'_m == .
	replace `var'_m = . if `var'_m == 98|`var'_m ==99
}

egen nmis=rmiss2(V1_m-V85_m)

keep ID_INT nmis
save data/output/nmis.dta

restore

merge 1:1 ID_INT using data/output/nmis.dta 

save data/output/iraqmerged.dta, replace
